
from tqdm import tqdm
import json

from medical_ner.utils.constants import EN_DICT

with open('./training.txt', 'r', encoding='utf-8') as reader:
    for item in tqdm(reader.readlines()[:1]):

        line = eval(item.strip())
        # print(line.keys())
        originalText = line['originalText']
        datatag = [0 for _ in originalText]
        print(datatag)
        print("==="*11)
        # print(list(line['originalText'].strip().replace('\r\n', '🚗').replace(' ', '🚗')))
        entities = line['entities']
        for entity in entities:
            # print(entity)
            # print(entity.keys())
            label_type = entity['label_type']
            overlap = entity['overlap']
            start_pos = entity['start_pos']
            end_pos = entity['end_pos']
            tag = EN_DICT[label_type]
            num_pos = end_pos-start_pos+1
            if num_pos == 1:
                datatag[start_pos] = f"S-{label_type}"
            else:
                datatag[start_pos] =f"B-{label_type}"
                datatag[start_pos+1:end_pos] = [f"M-{label_type}" for _ in range(end_pos-start_pos-1)]
                datatag[end_pos-1] = f"E-{label_type}"
            print(datatag)